#!/bin/bash
#../bin/phantomjs ./getalexa.js | grep yes |  awk '/yes/{print $2" "$3}' > ./urls
max=1000
cnt=0
nexturl=http://top.chinaz.com/list.aspx?fn=alexa
getnexturl()
{
    echo getting nextpageurl from $nexturl
    tmpnexturl=`../bin/phantomjs ./getnexturl.js "$nexturl" | awk '/^next/{print $2}'`
    echo nexturl:$tmpnexturl
}
getalexa()
{
    echo $cnt : getting alexa data... from $nexturl
    ../bin/phantomjs ./getalexa.js "$nexturl" > ./getalexa.tmp
}
rm ./rawdata.alexa
while [[ $cnt -lt $max ]]
    do 
        getalexa
        num=`cat ./getalexa.tmp | grep ^yes -c`
        while [ "$num" = "0" ]
            do
                echo getalexa.js failed retrying...
                getalexa
            done
        cat ./getalexa.tmp | awk '/^yes/{for (i=2;i<NF;++i) printf "%s ", $i; printf "%s\n", $NF}' >> rawdata.alexa 
        echo rawdata written
        getnexturl
        while [ "$tmpnexturl" = "wrongurl" ]
            do
                echo getnexturl.js failed retrying...
                getnexturl
            done
        nexturl=$tmpnexturl
        ((cnt=$cnt+$num))
    done
rm ./getalexa.tmp
